Trail 1:Initailly when I tried to run Homoney integration after runing pca on indiviudal samples and then merged the samples it failed. Trail2: I will merge the samples after findvariables step and then perform scaling and pca and then run harmony. Trail3: Merge the sample datasets before performing any steps of QC to save time and computational power trail4: it does not make any sense to plot the viloin plot with two samples in one plot. so we are going back, removing the merged datstet perfrom the QC with individual samples and then merging and then going for the data normalization step (downstream analysis)

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
#library(Matrix)
library(Seurat)
## Warning: package 'Seurat' was built under R version 4.4.1
## Loading required package: SeuratObject
## Warning: package 'SeuratObject' was built under R version 4.4.1
## Loading required package: sp
## Warning: package 'sp' was built under R version 4.4.1
## 
## Attaching package: 'SeuratObject'
## The following objects are masked from 'package:base':
## 
##     intersect, t
library(patchwork)
## Warning: package 'patchwork' was built under R version 4.4.1

Load the data: Rename the file names to barcodes, features and matrix as Read10X does not recognise other file name.

sam1 <- Read10X("~/Documents/Drive G/Personal Project/sc_RNAseq tutorial/sample/sample_1")
sam2 <- Read10X("~/Documents/Drive G/Personal Project/sc_RNAseq tutorial/sample/sample_2")

Convert the files into seurat objects

sam1 <- CreateSeuratObject(counts = sam1, min.cells = 3, min.features = 200, project = "sam1")
sam2 <- CreateSeuratObject(counts = sam2, min.cells= 3, min.features = 200, project ="sam2")

mitochondrial counts

sam1[["percent.mt"]] <- PercentageFeatureSet(sam1, pattern = "^MT-")
sam2[["percent.mt"]] <- PercentageFeatureSet(sam2, pattern = "^MT-")

View

#View(sam1@meta.data)
#View(sam2@meta.data)

Violin plot

#VlnPlot(Merged_sam, features = c("nFeature_RNA","nCount_RNA","percent.mt"), ncol = 3)
VlnPlot(sam1, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.
## Warning: The `slot` argument of `FetchData()` is deprecated as of SeuratObject 5.0.0.
## ℹ Please use the `layer` argument instead.
## ℹ The deprecated feature was likely used in the Seurat package.
##   Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: `PackageCheck()` was deprecated in SeuratObject 5.0.0.
## ℹ Please use `rlang::check_installed()` instead.
## ℹ The deprecated feature was likely used in the Seurat package.
##   Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

VlnPlot(sam2, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.

perform QC

#Merged_sam <- subset(Merged_sam, subset = nFeature_RNA > 200 & nFeature_RNA < 6500 & percent.mt < 15 )
sam1 <- subset(sam1, subset = nFeature_RNA > 200 & nFeature_RNA < 6500 & percent.mt < 15)
sam2 <- subset(sam2, subset = nFeature_RNA > 200 & nFeature_RNA < 6500 & percent.mt < 15)

View violin plot after QC

#VlnPlot(Merged_sam, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3 )
VlnPlot(sam1, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.

VlnPlot(sam2, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.

Merge samples

Merged_sam <- merge(sam1,sam2, add.cell.ids = c("sam1","sam2"), project = "Merged_sam")
#rm(Merged_sam)

Pre-processing

Normalization

Merged_sam <- NormalizeData(Merged_sam)
## Normalizing layer: counts.sam1
## Normalizing layer: counts.sam2
#sam1 <- NormalizeData(sam1)
#sam2 <- NormalizeData(sam2)

Finding variable genes

Merged_sam <- FindVariableFeatures(Merged_sam, selection.method =  "vst", nfeatures = 2000)
## Finding variable features for layer counts.sam1
## Finding variable features for layer counts.sam2
#sam1 <- FindVariableFeatures(sam1,  selection.method = "vst", nfeatures = 2000)
#sam2 <- FindVariableFeatures(sam2,  selection.method = "vst", nfeatures = 2000)

Scale data

Merged_sam <- ScaleData(Merged_sam)
## Centering and scaling data matrix
#sam1 <- ScaleData(sam1)
#sam2 <- ScaleData(sam2)

Run PCA

Merged_sam <- RunPCA(Merged_sam)
## PC_ 1 
## Positive:  FTL, SRGN, S100A4, CD52, FCER1G, HLA-DPB1, S100A9, HLA-DPA1, CTSS, FCGR3A 
##     SCGB2A2, C1QB, C1QA, CCL5, C1orf162, NKG7, AIF1, ALOX5AP, LYZ, CD14 
##     HLA-DRA, C1QC, S100A8, GZMA, TFF3, APOE, CST7, GNLY, MS4A6A, VSIG4 
## Negative:  C1R, CALD1, C1S, ID4, RARRES2, CCDC80, UPK3B, COL1A2, KLK11, TM4SF1 
##     PROCR, PDPN, NNMT, ID1, COL1A1, CFI, IGFBP6, C3, PTGIS, EGFL6 
##     TPM2, SERPING1, RARRES1, COL6A2, PLA2G2A, CFH, HTRA1, PLAT, COL8A1, CALB2 
## PC_ 2 
## Positive:  KRT19, PPDPF, XBP1, STARD10, SCGB2A2, TFF3, KRT18, COX6C, S100P, CD24 
##     BAMBI, SCGB1D2, METRN, KRT8, ADIRF, NQO1, FXYD3, AGR2, CCL5, RAB11FIP1 
##     SMIM22, ELF3, NKG7, MDK, SNCG, GZMA, GNLY, MLPH, CRABP2, CCND1 
## Negative:  SLC11A1, CYBB, SPI1, CD68, CD163, VSIG4, CD14, CFD, AIF1, MS4A6A 
##     LYZ, C3AR1, CPVL, MARCO, HLA-DRB1, MS4A4A, CTSB, MS4A7, C1QC, HLA-DRA 
##     C5AR1, CTSS, LST1, CD74, ENG, FN1, S100A8, FCER1G, CSF1R, MAFB 
## PC_ 3 
## Positive:  NKG7, CCL5, GZMA, CST7, GNLY, FGFBP2, GZMB, MALAT1, CTSW, CD52 
##     KLRB1, CD7, GZMH, ZFP36L2, TRBC1, TRBC2, IL32, CLIC3, GYPC, CD3D 
##     TRAC, CCL4, CD3G, TRDC, RARRES3, S100A4, SPOCK2, DUSP2, C12orf75, CCL4L2 
## Negative:  KRT19, COX6C, STARD10, XBP1, TFF3, KRT18, SCGB2A2, S100P, CCND1, ADIRF 
##     BAMBI, CD24, KRT8, RAB11FIP1, HSPB1, AGR2, NANS, METRN, FXYD3, MDK 
##     SCGB1D2, ELF3, UQCRQ, PPFIA1, SMIM22, NQO1, SPINT2, SNCG, CYB5A, MLPH 
## PC_ 4 
## Positive:  SCGB2A2, TFF3, XBP1, SCGB1D2, KRT19, S100P, FTL, CD24, ADIRF, STARD10 
##     FXYD3, KRT18, CCND1, AGR2, COX6C, CGA, KRT8, RAB11FIP1, APOD, SMIM22 
##     CEMIP, C1QB, APOE, CLU, CA12, IL13RA2, HP, TFPI2, WFDC2, CHI3L1 
## Negative:  TYMS, MKI67, NUSAP1, PCLAF, CDKN3, BIRC5, TOP2A, STMN1, TK1, CCNB2 
##     CENPF, TPX2, PTTG1, DLGAP5, CDCA3, UBE2C, UBE2T, HMGB2, CEP55, HMMR 
##     MAD2L1, RRM2, ZWINT, CCNA2, CENPA, GTSE1, CDK1, KIF23, CENPM, ASPM 
## PC_ 5 
## Positive:  BDKRB1, ARSI, EMP1, SERPINE1, SBSN, MAP1B, TPPP3, ANGPTL2, DPYSL3, HAS1 
##     FSTL3, SERPINB2, AHNAK2, C11orf96, CAV1, HSPG2, LUM, NOV, PODXL, COL5A1 
##     CADM3, MEG3, SPNS2, LINC01133, FLNC, TIMP3, CAV2, S100A3, APOA1, GJB2 
## Negative:  IL13RA2, CEMIP, GABRB2, ADAMTS9, EPGN, FMO1, ADH1B, CILP, PCP4, CLIC5 
##     TCEAL6, STEAP1, TCEAL2, CSRP2, FLRT2, ALDH8A1, CHI3L1, STEAP2, MGARP, OSR1 
##     FST, MYLK, SLC40A1, CHRDL1, TRDC, CST6, PRRX1, ETV7, NR2F1, GRIA2
#sam1 <- RunPCA(sam1)
#sam2 <- RunPCA(sam2)
DimPlot(Merged_sam, reduction = "pca") #+ NoLegend()

DimPlot(Merged_sam, reduction = "pca", dim = c(1,5))

VizDimLoadings(Merged_sam, dims = 1:2, reduction = "pca")

Run Harmony Integration

#install.packages('harmony')
library(harmony)
## Warning: package 'harmony' was built under R version 4.4.1
## Loading required package: Rcpp
## Warning: package 'Rcpp' was built under R version 4.4.1
## Run Harmony Integration
options(repr.plot.height = 2.5, repr.plot.width = 6)
Merged_sam <- RunHarmony(Merged_sam,"orig.ident", plot_convergence = TRUE)
## Transposing data matrix
## Initializing state using k-means centroids initialization
## Harmony 1/10
## Harmony 2/10
## Harmony 3/10
## Harmony 4/10
## Harmony 5/10
## Harmony converged after 5 iterations

## Warning: The `slot` argument of `GetAssayData()` is deprecated as of SeuratObject 5.0.0.
## ℹ Please use the `layer` argument instead.
## ℹ The deprecated feature was likely used in the Seurat package.
##   Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Run Clustering

Merged_sam <- RunUMAP(Merged_sam, reduction = 'harmony', dims = 1:30,
                           reduction.name = 'umap.harmony')
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 11:59:21 UMAP embedding parameters a = 0.9922 b = 1.112
## 11:59:21 Read 12315 rows and found 30 numeric columns
## 11:59:21 Using Annoy for neighbor search, n_neighbors = 30
## 11:59:21 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 11:59:22 Writing NN index file to temp file /var/folders/ql/4mkyx9c51q3d4h02kxsxhtj80000gn/T//RtmpfjxHbq/file33984e632c9c
## 11:59:22 Searching Annoy index using 1 thread, search_k = 3000
## 11:59:24 Annoy recall = 100%
## 11:59:24 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 11:59:24 Initializing from normalized Laplacian + noise (using RSpectra)
## 11:59:25 Commencing optimization for 200 epochs, with 551504 positive edges
## 11:59:25 Using rng type: pcg
## 11:59:30 Optimization finished
Merged_sam <- FindNeighbors(Merged_sam, reduction = 'harmony', dims = 1:30)
## Computing nearest neighbor graph
## Warning: package 'future' was built under R version 4.4.1
## Computing SNN
Merged_sam <- FindClusters(Merged_sam, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 12315
## Number of edges: 509960
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9189
## Number of communities: 14
## Elapsed time: 1 seconds
Merged_sam <- FindClusters(Merged_sam, resolution = 0.1)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 12315
## Number of edges: 509960
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9768
## Number of communities: 8
## Elapsed time: 1 seconds
DimPlot(Merged_sam, reduction ="umap.harmony", group.by = "orig.ident")

p2 <- DimPlot(
  Merged_sam,
  reduction = "umap.harmony",
  group.by = c("orig.ident","RNA_snn_res.0.1"),
  combine = FALSE, label.size = 2
)
p2
## [[1]]

## 
## [[2]]

Joining layers

Merged_sam <- JoinLayers(Merged_sam)

Finding marker genes in the clusters.

Merged_sam.markers <- FindAllMarkers(Merged_sam, only.pos = TRUE)
## Calculating cluster 0
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Calculating cluster 4
## Calculating cluster 5
## Calculating cluster 6
## Calculating cluster 7

Filtering the marker gene list to keep the most upregulated genes

top_markers<- Merged_sam.markers %>%
    group_by(cluster) %>%
    dplyr::filter(avg_log2FC > 1)
write.csv(top_markers, file = "top_markers.csv", row.names = FALSE)
#top10 <- Merged_sam.markers %>%
 # group_by(cluster) %>%
  #top_n(n = 1, wt = avg_log2FC)
Merged_sam.markers %>%
    group_by(cluster) %>%
    dplyr::filter(avg_log2FC > 1) %>%
    slice_head(n = 10) %>%
    ungroup() -> top10

Plotting the upregulated genes in each cluster

FeaturePlot(Merged_sam, features = c("S100P", "SLC11A1", "C1R", "KLRB1", "TRAC", "NUSAP1", "HLA-DQB2", "TCL1A"))

DoHeatmap(Merged_sam, features = top10$gene) + NoLegend()
## Warning in DoHeatmap(Merged_sam, features = top10$gene): The following features
## were omitted as they were not found in the scale.data slot for the RNA assay:
## TSPAN33, AL138899.1, GZMM, CD2, CD3E, KLRD1

Tryig to run SingleR on the sample clusters for cell annotation

Getting normalized and raw counts

raw_counts <- LayerData(Merged_sam, assay = "RNA", layer = 'counts')
raw_counts[c("S100P", "SLC11A1", "C1R", "KLRB1", "TRAC"),1:2]
## 5 x 2 sparse Matrix of class "dgCMatrix"
##         sam1_AAACCCAAGACGGTTG-1 sam1_AAACCCAAGGTCACAG-1
## S100P                         4                       .
## SLC11A1                       .                       .
## C1R                          34                       .
## KLRB1                         .                       .
## TRAC                          .                       .
norm_counts <- LayerData(Merged_sam, assay = "RNA", layer = 'data')
norm_counts[c("S100P", "SLC11A1", "C1R", "KLRB1", "TRAC"),1:2]
## 5 x 2 sparse Matrix of class "dgCMatrix"
##         sam1_AAACCCAAGACGGTTG-1 sam1_AAACCCAAGGTCACAG-1
## S100P                  1.041993                       .
## SLC11A1                .                              .
## C1R                    2.809182                       .
## KLRB1                  .                              .
## TRAC                   .                              .

Install Tidyverse and celldex

#install.packages("tidyverse")
#BiocManager::install("celldex")
library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.4.1
## Warning: package 'purrr' was built under R version 4.4.1
## Warning: package 'lubridate' was built under R version 4.4.1
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.5
## ✔ ggplot2   3.5.2     ✔ stringr   1.5.1
## ✔ lubridate 1.9.4     ✔ tibble    3.2.1
## ✔ purrr     1.0.4     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
#install.packages("ggplot2")
library(ggplot2)
library(celldex)
## Loading required package: SummarizedExperiment
## Warning: package 'SummarizedExperiment' was built under R version 4.4.1
## Loading required package: MatrixGenerics
## Warning: package 'MatrixGenerics' was built under R version 4.4.2
## Loading required package: matrixStats
## Warning: package 'matrixStats' was built under R version 4.4.1
## 
## Attaching package: 'matrixStats'
## 
## The following object is masked from 'package:dplyr':
## 
##     count
## 
## 
## Attaching package: 'MatrixGenerics'
## 
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## 
## Loading required package: GenomicRanges
## Warning: package 'GenomicRanges' was built under R version 4.4.1
## Loading required package: stats4
## Loading required package: BiocGenerics
## Warning: package 'BiocGenerics' was built under R version 4.4.1
## 
## Attaching package: 'BiocGenerics'
## 
## The following objects are masked from 'package:lubridate':
## 
##     intersect, setdiff, union
## 
## The following object is masked from 'package:SeuratObject':
## 
##     intersect
## 
## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union
## 
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## 
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, saveRDS, setdiff,
##     table, tapply, union, unique, unsplit, which.max, which.min
## 
## Loading required package: S4Vectors
## Warning: package 'S4Vectors' was built under R version 4.4.1
## 
## Attaching package: 'S4Vectors'
## 
## The following objects are masked from 'package:lubridate':
## 
##     second, second<-
## 
## The following object is masked from 'package:tidyr':
## 
##     expand
## 
## The following objects are masked from 'package:dplyr':
## 
##     first, rename
## 
## The following object is masked from 'package:utils':
## 
##     findMatches
## 
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## 
## Loading required package: IRanges
## Warning: package 'IRanges' was built under R version 4.4.2
## 
## Attaching package: 'IRanges'
## 
## The following object is masked from 'package:lubridate':
## 
##     %within%
## 
## The following object is masked from 'package:purrr':
## 
##     reduce
## 
## The following object is masked from 'package:sp':
## 
##     %over%
## 
## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice
## 
## Loading required package: GenomeInfoDb
## Warning: package 'GenomeInfoDb' was built under R version 4.4.2
## Loading required package: Biobase
## Warning: package 'Biobase' was built under R version 4.4.1
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## 
## Attaching package: 'Biobase'
## 
## The following object is masked from 'package:MatrixGenerics':
## 
##     rowMedians
## 
## The following objects are masked from 'package:matrixStats':
## 
##     anyMissing, rowMedians
## 
## 
## Attaching package: 'SummarizedExperiment'
## 
## The following object is masked from 'package:Seurat':
## 
##     Assays
## 
## The following object is masked from 'package:SeuratObject':
## 
##     Assays

Get reference dataset from celldex

ref <- celldex::HumanPrimaryCellAtlasData()
unique(ref$label.main)
##  [1] "DC"                   "Smooth_muscle_cells"  "Epithelial_cells"    
##  [4] "B_cell"               "Neutrophils"          "T_cells"             
##  [7] "Monocyte"             "Erythroblast"         "BM & Prog."          
## [10] "Endothelial_cells"    "Gametocytes"          "Neurons"             
## [13] "Keratinocytes"        "HSC_-G-CSF"           "Macrophage"          
## [16] "NK_cell"              "Embryonic_stem_cells" "Tissue_stem_cells"   
## [19] "Chondrocytes"         "Osteoblasts"          "BM"                  
## [22] "Platelets"            "Fibroblasts"          "iPS_cells"           
## [25] "Hepatocytes"          "MSC"                  "Neuroepithelial_cell"
## [28] "Astrocyte"            "HSC_CD34+"            "CMP"                 
## [31] "GMP"                  "MEP"                  "Myelocyte"           
## [34] "Pre-B_cell_CD34-"     "Pro-B_cell_CD34+"     "Pro-Myelocyte"
unique(ref$label.fine)
##   [1] "DC:monocyte-derived:immature"                          
##   [2] "DC:monocyte-derived:Galectin-1"                        
##   [3] "DC:monocyte-derived:LPS"                               
##   [4] "DC:monocyte-derived"                                   
##   [5] "Smooth_muscle_cells:bronchial:vit_D"                   
##   [6] "Smooth_muscle_cells:bronchial"                         
##   [7] "Epithelial_cells:bronchial"                            
##   [8] "B_cell"                                                
##   [9] "Neutrophil"                                            
##  [10] "T_cell:CD8+_Central_memory"                            
##  [11] "T_cell:CD8+"                                           
##  [12] "T_cell:CD4+"                                           
##  [13] "T_cell:CD8+_effector_memory_RA"                        
##  [14] "T_cell:CD8+_effector_memory"                           
##  [15] "T_cell:CD8+_naive"                                     
##  [16] "Monocyte"                                              
##  [17] "Erythroblast"                                          
##  [18] "BM"                                                    
##  [19] "DC:monocyte-derived:rosiglitazone"                     
##  [20] "DC:monocyte-derived:AM580"                             
##  [21] "DC:monocyte-derived:rosiglitazone/AGN193109"           
##  [22] "DC:monocyte-derived:anti-DC-SIGN_2h"                   
##  [23] "Endothelial_cells:HUVEC"                               
##  [24] "Endothelial_cells:HUVEC:Borrelia_burgdorferi"          
##  [25] "Endothelial_cells:HUVEC:IFNg"                          
##  [26] "Endothelial_cells:lymphatic"                           
##  [27] "Endothelial_cells:HUVEC:Serum_Amyloid_A"               
##  [28] "Endothelial_cells:lymphatic:TNFa_48h"                  
##  [29] "T_cell:effector"                                       
##  [30] "T_cell:CCR10+CLA+1,25(OH)2_vit_D3/IL-12"               
##  [31] "T_cell:CCR10-CLA+1,25(OH)2_vit_D3/IL-12"               
##  [32] "Gametocytes:spermatocyte"                              
##  [33] "DC:monocyte-derived:A._fumigatus_germ_tubes_6h"        
##  [34] "Neurons:ES_cell-derived_neural_precursor"              
##  [35] "Keratinocytes"                                         
##  [36] "Keratinocytes:IL19"                                    
##  [37] "Keratinocytes:IL20"                                    
##  [38] "Keratinocytes:IL22"                                    
##  [39] "Keratinocytes:IL24"                                    
##  [40] "Keratinocytes:IL26"                                    
##  [41] "Keratinocytes:KGF"                                     
##  [42] "Keratinocytes:IFNg"                                    
##  [43] "Keratinocytes:IL1b"                                    
##  [44] "HSC_-G-CSF"                                            
##  [45] "DC:monocyte-derived:mature"                            
##  [46] "Monocyte:anti-FcgRIIB"                                 
##  [47] "Macrophage:monocyte-derived:IL-4/cntrl"                
##  [48] "Macrophage:monocyte-derived:IL-4/Dex/cntrl"            
##  [49] "Macrophage:monocyte-derived:IL-4/Dex/TGFb"             
##  [50] "Macrophage:monocyte-derived:IL-4/TGFb"                 
##  [51] "Monocyte:leukotriene_D4"                               
##  [52] "NK_cell"                                               
##  [53] "NK_cell:IL2"                                           
##  [54] "Embryonic_stem_cells"                                  
##  [55] "Tissue_stem_cells:iliac_MSC"                           
##  [56] "Chondrocytes:MSC-derived"                              
##  [57] "Osteoblasts"                                           
##  [58] "Tissue_stem_cells:BM_MSC"                              
##  [59] "Osteoblasts:BMP2"                                      
##  [60] "Tissue_stem_cells:BM_MSC:BMP2"                         
##  [61] "Tissue_stem_cells:BM_MSC:TGFb3"                        
##  [62] "DC:monocyte-derived:Poly(IC)"                          
##  [63] "DC:monocyte-derived:CD40L"                             
##  [64] "DC:monocyte-derived:Schuler_treatment"                 
##  [65] "DC:monocyte-derived:antiCD40/VAF347"                   
##  [66] "Tissue_stem_cells:dental_pulp"                         
##  [67] "T_cell:CD4+_central_memory"                            
##  [68] "T_cell:CD4+_effector_memory"                           
##  [69] "T_cell:CD4+_Naive"                                     
##  [70] "Smooth_muscle_cells:vascular"                          
##  [71] "Smooth_muscle_cells:vascular:IL-17"                    
##  [72] "Platelets"                                             
##  [73] "Epithelial_cells:bladder"                              
##  [74] "Macrophage:monocyte-derived"                           
##  [75] "Macrophage:monocyte-derived:M-CSF"                     
##  [76] "Macrophage:monocyte-derived:M-CSF/IFNg"                
##  [77] "Macrophage:monocyte-derived:M-CSF/Pam3Cys"             
##  [78] "Macrophage:monocyte-derived:M-CSF/IFNg/Pam3Cys"        
##  [79] "Macrophage:monocyte-derived:IFNa"                      
##  [80] "Gametocytes:oocyte"                                    
##  [81] "Monocyte:F._tularensis_novicida"                       
##  [82] "Endothelial_cells:HUVEC:B._anthracis_LT"               
##  [83] "B_cell:Germinal_center"                                
##  [84] "B_cell:Plasma_cell"                                    
##  [85] "B_cell:Naive"                                          
##  [86] "B_cell:Memory"                                         
##  [87] "DC:monocyte-derived:AEC-conditioned"                   
##  [88] "Tissue_stem_cells:lipoma-derived_MSC"                  
##  [89] "Tissue_stem_cells:adipose-derived_MSC_AM3"             
##  [90] "Endothelial_cells:HUVEC:FPV-infected"                  
##  [91] "Endothelial_cells:HUVEC:PR8-infected"                  
##  [92] "Endothelial_cells:HUVEC:H5N1-infected"                 
##  [93] "Macrophage:monocyte-derived:S._aureus"                 
##  [94] "Fibroblasts:foreskin"                                  
##  [95] "iPS_cells:skin_fibroblast-derived"                     
##  [96] "iPS_cells:skin_fibroblast"                             
##  [97] "T_cell:gamma-delta"                                    
##  [98] "Monocyte:CD14+"                                        
##  [99] "Macrophage:Alveolar"                                   
## [100] "Macrophage:Alveolar:B._anthacis_spores"                
## [101] "Neutrophil:inflam"                                     
## [102] "iPS_cells:PDB_fibroblasts"                             
## [103] "iPS_cells:PDB_1lox-17Puro-5"                           
## [104] "iPS_cells:PDB_1lox-17Puro-10"                          
## [105] "iPS_cells:PDB_1lox-21Puro-20"                          
## [106] "iPS_cells:PDB_1lox-21Puro-26"                          
## [107] "iPS_cells:PDB_2lox-5"                                  
## [108] "iPS_cells:PDB_2lox-22"                                 
## [109] "iPS_cells:PDB_2lox-21"                                 
## [110] "iPS_cells:PDB_2lox-17"                                 
## [111] "iPS_cells:CRL2097_foreskin"                            
## [112] "iPS_cells:CRL2097_foreskin-derived:d20_hepatic_diff"   
## [113] "iPS_cells:CRL2097_foreskin-derived:undiff."            
## [114] "B_cell:CXCR4+_centroblast"                             
## [115] "B_cell:CXCR4-_centrocyte"                              
## [116] "Endothelial_cells:HUVEC:VEGF"                          
## [117] "iPS_cells:fibroblasts"                                 
## [118] "iPS_cells:fibroblast-derived:Direct_del._reprog"       
## [119] "iPS_cells:fibroblast-derived:Retroviral_transf"        
## [120] "Endothelial_cells:lymphatic:KSHV"                      
## [121] "Endothelial_cells:blood_vessel"                        
## [122] "Monocyte:CD16-"                                        
## [123] "Monocyte:CD16+"                                        
## [124] "Tissue_stem_cells:BM_MSC:osteogenic"                   
## [125] "Hepatocytes"                                           
## [126] "Neutrophil:uropathogenic_E._coli_UTI89"                
## [127] "Neutrophil:commensal_E._coli_MG1655"                   
## [128] "MSC"                                                   
## [129] "Neuroepithelial_cell:ESC-derived"                      
## [130] "Astrocyte:Embryonic_stem_cell-derived"                 
## [131] "Endothelial_cells:HUVEC:IL-1b"                         
## [132] "HSC_CD34+"                                             
## [133] "CMP"                                                   
## [134] "GMP"                                                   
## [135] "B_cell:immature"                                       
## [136] "MEP"                                                   
## [137] "Myelocyte"                                             
## [138] "Pre-B_cell_CD34-"                                      
## [139] "Pro-B_cell_CD34+"                                      
## [140] "Pro-Myelocyte"                                         
## [141] "Smooth_muscle_cells:umbilical_vein"                    
## [142] "iPS_cells:foreskin_fibrobasts"                         
## [143] "iPS_cells:iPS:minicircle-derived"                      
## [144] "iPS_cells:adipose_stem_cells"                          
## [145] "iPS_cells:adipose_stem_cell-derived:lentiviral"        
## [146] "iPS_cells:adipose_stem_cell-derived:minicircle-derived"
## [147] "Fibroblasts:breast"                                    
## [148] "Monocyte:MCSF"                                         
## [149] "Monocyte:CXCL4"                                        
## [150] "Neurons:adrenal_medulla_cell_line"                     
## [151] "Tissue_stem_cells:CD326-CD56+"                         
## [152] "NK_cell:CD56hiCD62L+"                                  
## [153] "T_cell:Treg:Naive"                                     
## [154] "Neutrophil:LPS"                                        
## [155] "Neutrophil:GM-CSF_IFNg"                                
## [156] "Monocyte:S._typhimurium_flagellin"                     
## [157] "Neurons:Schwann_cell"

install SingleR

#BiocManager::install("SingleR")
library(SingleR)
## Warning: package 'SingleR' was built under R version 4.4.1
## 
## Attaching package: 'SingleR'
## The following objects are masked from 'package:celldex':
## 
##     BlueprintEncodeData, DatabaseImmuneCellExpressionData,
##     HumanPrimaryCellAtlasData, ImmGenData, MonacoImmuneData,
##     MouseRNAseqData, NovershternHematopoieticData
#BiocManager::install("scran")
library(scran)
## Warning: package 'scran' was built under R version 4.4.1
## Loading required package: SingleCellExperiment
## Warning: package 'SingleCellExperiment' was built under R version 4.4.1
## Loading required package: scuttle
## Warning: package 'scuttle' was built under R version 4.4.1

Run singleR on normalized data

ct_ann <- SingleR(test = norm_counts, # we could also use sce or raw_counts
                  ref = ref, 
                  labels = ref$label.main,
                  de.method = 'wilcox')
## Warning in FUN(...): no within-block comparison between Neuroepithelial_cell
## and BM & Prog.
#library("magrittr")
ct_ann %>% head()
## DataFrame with 6 rows and 4 columns
##                                                 scores           labels
##                                               <matrix>      <character>
## sam1_AAACCCAAGACGGTTG-1 0.403145:0.345071:0.349268:...     Chondrocytes
## sam1_AAACCCAAGGTCACAG-1 0.166320:0.188311:0.200635:...       Macrophage
## sam1_AAACCCAAGTGCGTCC-1 0.173666:0.196706:0.213181:... Pre-B_cell_CD34-
## sam1_AAACCCACACGCTATA-1 0.127044:0.168634:0.181017:...               DC
## sam1_AAACCCAGTTTCCCAC-1 0.161358:0.172904:0.179275:...       Macrophage
## sam1_AAACCCATCTTGATTC-1 0.201712:0.266665:0.320877:...       Macrophage
##                          delta.next    pruned.labels
##                           <numeric>      <character>
## sam1_AAACCCAAGACGGTTG-1 0.053144843     Chondrocytes
## sam1_AAACCCAAGGTCACAG-1 0.004849993       Macrophage
## sam1_AAACCCAAGTGCGTCC-1 0.000941892 Pre-B_cell_CD34-
## sam1_AAACCCACACGCTATA-1 0.002488869               DC
## sam1_AAACCCAGTTTCCCAC-1 0.001034198       Macrophage
## sam1_AAACCCATCTTGATTC-1 0.204236805       Macrophage
unique(ct_ann$pruned.labels)
##  [1] "Chondrocytes"         "Macrophage"           "Pre-B_cell_CD34-"    
##  [4] "DC"                   "Fibroblasts"          "Pro-B_cell_CD34+"    
##  [7] "Epithelial_cells"     "GMP"                  "MSC"                 
## [10] "T_cells"              "B_cell"               "NK_cell"             
## [13] "Hepatocytes"          "HSC_-G-CSF"           "Monocyte"            
## [16] "Smooth_muscle_cells"  "HSC_CD34+"            "Endothelial_cells"   
## [19] NA                     "Pro-Myelocyte"        "Neutrophils"         
## [22] "Platelets"            "Osteoblasts"          "Keratinocytes"       
## [25] "Tissue_stem_cells"    "Embryonic_stem_cells" "Neurons"             
## [28] "CMP"                  "Myelocyte"            "iPS_cells"           
## [31] "Neuroepithelial_cell" "BM"                   "Astrocyte"           
## [34] "BM & Prog."           "Erythroblast"         "MEP"                 
## [37] "Gametocytes"
table(ct_ann$pruned.labels)
## 
##            Astrocyte               B_cell                   BM 
##                    5                   70                  148 
##           BM & Prog.         Chondrocytes                  CMP 
##                   13                  129                  152 
##                   DC Embryonic_stem_cells    Endothelial_cells 
##                  441                   21                   78 
##     Epithelial_cells         Erythroblast          Fibroblasts 
##                 1648                    4                  700 
##          Gametocytes                  GMP          Hepatocytes 
##                    1                  182                  135 
##           HSC_-G-CSF            HSC_CD34+            iPS_cells 
##                  182                   53                    9 
##        Keratinocytes           Macrophage                  MEP 
##                   56                 2964                    8 
##             Monocyte                  MSC            Myelocyte 
##                  312                  219                   26 
## Neuroepithelial_cell              Neurons          Neutrophils 
##                    7                   33                   16 
##              NK_cell          Osteoblasts            Platelets 
##                 1367                   15                    6 
##     Pre-B_cell_CD34-     Pro-B_cell_CD34+        Pro-Myelocyte 
##                  515                   97                  108 
##  Smooth_muscle_cells              T_cells    Tissue_stem_cells 
##                  935                 1436                   33
print("ct_ann$labels")
## [1] "ct_ann$labels"
table(ct_ann$labels)
## 
##            Astrocyte               B_cell                   BM 
##                    6                   70                  148 
##           BM & Prog.         Chondrocytes                  CMP 
##                   13                  139                  152 
##                   DC Embryonic_stem_cells    Endothelial_cells 
##                  441                   21                   79 
##     Epithelial_cells         Erythroblast          Fibroblasts 
##                 1648                    4                  758 
##          Gametocytes                  GMP          Hepatocytes 
##                    1                  182                  135 
##           HSC_-G-CSF            HSC_CD34+            iPS_cells 
##                  182                   53                    9 
##        Keratinocytes           Macrophage                  MEP 
##                   56                 3004                    9 
##             Monocyte                  MSC            Myelocyte 
##                  312                  226                   26 
## Neuroepithelial_cell              Neurons          Neutrophils 
##                    7                   33                   16 
##              NK_cell          Osteoblasts            Platelets 
##                 1367                   19                    7 
##     Pre-B_cell_CD34-     Pro-B_cell_CD34+        Pro-Myelocyte 
##                  515                   97                  110 
##  Smooth_muscle_cells              T_cells    Tissue_stem_cells 
##                  998                 1439                   33
summary(is.na(ct_ann$pruned.labels))
##    Mode   FALSE    TRUE 
## logical   12124     191
plotScoreHeatmap(ct_ann)

plotDeltaDistribution(ct_ann, ncol = 4, dots.on.top = FALSE)
## Warning: Groups with fewer than two datapoints have been dropped.
## ℹ Set `drop = FALSE` to consider such groups for position adjustment purposes.
## Warning in max(data$density, na.rm = TRUE): no non-missing arguments to max;
## returning -Inf
## Warning: Computation failed in `stat_ydensity()`.
## Caused by error in `$<-.data.frame`:
## ! replacement has 1 row, data has 0

rownames(ct_ann)[1:2] # make sure you have cell IDs
## [1] "sam1_AAACCCAAGACGGTTG-1" "sam1_AAACCCAAGGTCACAG-1"
Merged_sam<- AddMetaData(Merged_sam, ct_ann$pruned.labels, col.name = 'SingleR_HCA')
Merged_sam <- SetIdent(Merged_sam, value = "SingleR_HCA")
DimPlot(Merged_sam, label = T , repel = T, label.size = 3) + NoLegend()
## Warning: ggrepel: 12 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps